#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
# 

"""
File: unit6.py
Author: zhangyang(zhangyang40@baidu.com)
Date: 2018/2/5 下午2:39
"""
import warnings

warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
from gensim.models import word2vec

if __name__ == '__main__':
    sentences = word2vec.Text8Corpus('data/text8')
    model = word2vec.Word2Vec(sentences, size=200)
    model.save('data/text8.model')
    # model = word2vec.Word2Vec('data/text8.model')
    model.most_similar(positive=['woman', 'man', 'kiss', 'love'], negative=['girl'], topn=5)
    # 找出不同类的词
    model.doesnt_match(['fuck', 'head', 'foot', 'hand'])  # fuck
    model.doesnt_match("breakfast cereal dinner lunch".split())  # cereal
    bigram_transformer = gensim.models.Phrases(sentences)
#     model = Word2Vec(bigram_transformer[sentences], size=100, ...)
